library(tidyverse)
library(plotly)
library(sf)
library(tigris)
library(leaflet)

September 22,2020

  1. Create bar or line charts showing monthly total kBTUs of residential and commercial electricity and gas consumption for the Bay Area (meaning the sum of all ZIP codes in the 9 Bay Area counties) from 2017 to the latest available month (meaning a 42-column version of the plots from section 1.8). Look online for the correct conversion of kWhs to kBTUs and therms to kBTUs. Make sure that electricity and gas data are distinguishable but plotted in the same chart; feel free to separate your analyses for residential and commercial into two separate charts if you believe it improves legibility, or do one chart with 4 colors in the legend

##kBTU Bar Graph depecting electricy and gas use in the bay for the last 3 yars

setwd("~/Desktop/Working_Directory/BayArea/HW1")
year <- 2017
quarters <- 1:14
type <- "Gas"

pge_gas <- NULL
quarter = 1

for(run  in quarters) {
  if ((run == 5) | (run == 9) | (run == 13)){
    year = year + 1
    quarter = quarter - 4
  }
  if ((quarter == 3) & (year == 2020)){
    break
  }

  filename <-
    paste0(
      "PGE_",
      year,
      "_Q",
      quarter,
      "_",
      type,
      "UsageByZip.csv"
    )
  temp <- read_csv(filename)

  pge_gas <- rbind(pge_gas,temp)
  quarter = quarter + 1
}
year <- 2017
quarters <- 1:14
type <- "Electric"

pge_electric <- NULL
quarter = 1

for(run  in quarters) {
  if ((run == 5) | (run == 9) | (run == 13)){
    year = year + 1
    quarter = quarter - 4
  }
  if ((quarter == 3) & (year == 2020)){
    break
  }

  filename <-
    paste0(
      "PGE_",
      year,
      "_Q",
      quarter,
      "_",
      type,
      "UsageByZip.csv"
    )
  temp <- read_csv(filename)

  pge_electric <- rbind(pge_electric,temp)
  quarter = quarter + 1
}

pge_filter_elec <- filter(pge_electric, CUSTOMERCLASS %in% c("Elec- Residential","Elec- Commercial"))
pge_filter_gas <- filter(pge_gas, CUSTOMERCLASS %in% c("Gas- Residential","Gas- Commercial"))

pge_select_elec <- mutate(pge_filter_elec, MONTH = MONTH + (12*(YEAR - 2017)) )
pge_select_gas <- mutate(pge_filter_gas, MONTH = MONTH + (12*(YEAR - 2017)) )
pge_select_elec <-select(pge_select_elec  ,!c( COMBINED, AVERAGEKWH))
pge_select_gas <-select(pge_select_gas ,!c( COMBINED, AVERAGETHM))
rm(pge_filter_elec, pge_filter_gas)

pge_group_elec <- group_by( pge_select_elec, MONTH, CUSTOMERCLASS, YEAR)
pge_group_gas <- group_by( pge_select_gas, MONTH, CUSTOMERCLASS, YEAR)
rm(pge_select_elec,pge_select_gas)

pge_summarize_elec <- summarize( pge_group_elec, TOTALKWH = sum( TOTALKWH, na.rm = T),
                                 TOTALCUSTOMERS = sum(TOTALCUSTOMERS, na.rm = T))
pge_summarize_gas <- summarize( pge_group_gas, TOTALTHM = sum( TOTALTHM, na.rm = T),
                                TOTALCUSTOMERS = sum(TOTALCUSTOMERS, na.rm = T))


rm(pge_group_elec,pge_group_gas)

pge_mutate_elec <- mutate(pge_summarize_elec, AVERAGEkBTU = (TOTALKWH * 3412.14 ) /TOTALCUSTOMERS)
pge_mutate_gas  <- mutate(pge_summarize_gas , AVERAGEkBTU = (TOTALTHM * 99976.1 )/TOTALCUSTOMERS)

rm(pge_summarize_elec,pge_summarize_gas)

pgeELECT <-select(pge_mutate_elec  ,-TOTALKWH)
pgeGAS <-select(pge_mutate_gas  , -TOTALTHM )
pge_final<- rbind(pgeELECT,pgeGAS)
rm(pge_mutate_elec,pge_mutate_gas,pgeELECT,pgeGAS)

pge_chart <-
  pge_final %>% 
  ggplot() +
  geom_bar(
    aes(
      # x = MONTH %>% factor(),
      x = factor(MONTH),
      y = AVERAGEkBTU,
      fill = CUSTOMERCLASS
    ),
    stat = "identity",
    position = "stack"
    ) +
  labs(
    x = "Months Starting Jan 2017",
    y = "kBTU",
    title = "PG&E Territory Monthly Electricity and Gas Usage, 2017-Present",
    fill = "Electricity and Gas"
  )+
  coord_flip()+
  theme(
    legend.position = "bottom",
    legend.direction = "vertical"
  )
pge_chart %>% ggplotly() %>% config(displayModeBar = F)
#plot(pge_chart)


pge_filter_elecNEI <- filter(pge_electric, CUSTOMERCLASS %in% c("Elec- Residential"))
pge_filter_elecNEI <- filter(pge_filter_elecNEI, YEAR %in% c("2019", "2020"))
ElectMonth6 <- filter(pge_filter_elecNEI , MONTH %in% c("6"))
ElectMonth6  <-select(ElectMonth6  ,!c( COMBINED, AVERAGEKWH,CUSTOMERCLASS, MONTH))
ElectMonth6Group <- group_by( ElectMonth6, YEAR, ZIPCODE)
ElectMonth6Sum <- summarize( ElectMonth6Group, TOTALKWH = sum( TOTALKWH, na.rm = T),TOTALCUSTOMERS = sum(TOTALCUSTOMERS, na.rm = T))
Customers <- ElectMonth6Sum$TOTALCUSTOMERS
zeroCustomers<- which(Customers %in% 0)
ElectMonth6Sum <-ElectMonth6Sum[-c(zeroCustomers),]
ElecMutate<- mutate(ElectMonth6Sum, AVERAGEKWH = TOTALKWH /TOTALCUSTOMERS)

Observable changes in energy consumption that may be attributable to the COVID-19 pandemic: There doesnt seem to be any distinct changes in the expected electricity and gas usage during the covid pandemic if we use the prior year as a reference

Create at least one map that highlights which neighborhoods experienced the greatest change in electricity consumption before and after the pandemic began, and comment on your results.

Below is the energy consumption density within the bay area for June 2019 and June 2020. Both density graphs look pretty much identical, with the pandemic graph only showing slighlty more energy consumption

ca_counties <- readRDS("ca_counties.rds")

bay_county_names <-
  c(
    "Alameda",
    "Contra Costa",
    "Marin",
    "Napa",
    "San Francisco",
    "San Mateo",
    "Santa Clara",
    "Solano",
    "Sonoma"
  )
bay_counties <-
  ca_counties %>%
  filter(NAME %in% bay_county_names)

bay_cbgs <- block_groups("CA", bay_county_names[1:9], cb = T, progress_bar = F)
usa_zips <- zctas(cb = T, progress_bar = F)


bay_zips <-
  usa_zips %>%
  st_centroid() %>%
  .[bay_counties, ] %>%
  st_set_geometry(NULL) %>%
  left_join(usa_zips %>% select(GEOID10)) %>%
  st_as_sf()

Elec2019 <- filter(ElecMutate, YEAR %in% c("2019"))
Elec2020 <- filter(ElecMutate, YEAR %in% c("2020"))

Elec2019 <-     #2020
  Elec2019 %>%  #2020
  mutate(
    ZIPCODE = ZIPCODE %>% as.character()
  ) %>%
  mutate(
    ZIPCODE = ZIPCODE %>% as.character()
  ) %>%
  group_by(ZIPCODE) %>%
  summarize(
    TOTALKWH = sum(TOTALKWH, na.rm = T)
  ) %>%
  right_join(
    bay_zips %>% select(GEOID10),
    by = c("ZIPCODE" = "GEOID10")
  ) %>%
  st_as_sf() %>%
  st_transform(4326)

res_pal <- colorNumeric(
  palette = "Blues",
  domain =
    Elec2019$TOTALKWH   #2020
)

leaflet() %>%
  addTiles() %>%
  addPolygons(
    data = Elec2019,        #2020
    fillColor = ~res_pal(TOTALKWH),
    color = "white",
    opacity = 0.5,
    fillOpacity = 0.5,
    weight = 1,
    label = ~paste0(
      round(TOTALKWH),
      " kWh total in ",
      ZIPCODE
    ),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  ) %>%
  addLegend(
    data = Elec2019,   #2020
    pal = res_pal,
    values = ~TOTALKWH,
    title = "Energy usage <br>kWh, June 2019"
  )

The next plot shows energy usage in the bay area during the pandemic June 2020

Elec2020<-
  Elec2020 %>%  
  mutate(
    ZIPCODE = ZIPCODE %>% as.character()
  ) %>%
  mutate(
    ZIPCODE = ZIPCODE %>% as.character()
  ) %>%
  group_by(ZIPCODE) %>%
  summarize(
    TOTALKWH = sum(TOTALKWH, na.rm = T)
  ) %>%
  right_join(
    bay_zips %>% select(GEOID10),
    by = c("ZIPCODE" = "GEOID10")
  ) %>%
  st_as_sf() %>%
  st_transform(4326)

res_pal <- colorNumeric(
  palette = "Blues",
  domain =
    Elec2020$TOTALKWH   #2020
)

leaflet() %>%
  addTiles() %>%
  addPolygons(
    data = Elec2020,        #2020
    fillColor = ~res_pal(TOTALKWH),
    color = "white",
    opacity = 0.5,
    fillOpacity = 0.5,
    weight = 1,
    label = ~paste0(
      round(TOTALKWH),
      " kWh total in ",
      ZIPCODE
    ),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  ) %>%
  addLegend(
    data = Elec2020,   #2020
    pal = res_pal,
    values = ~TOTALKWH,
    title = "Energy usage <br>kWh, June 2020"
  )

Explain any key assumptions you made in the analysis, or caveats about the data sources that you think the reader should be aware of. Publish all of this work in a web report.

Key assumptions/caveats. We are only able to compare and contrast the energy usage on a census block scale because so we are not able to see variations from one neighborhood to the next.